In [5]:
import pandas as pd
import altair as alt
import numpy as np
In [6]:
#read in data
data = pd.read_csv('ds4200_filtered_data2.csv')
data.head()
Out[6]:
Unnamed: 0.1 Unnamed: 0 case_enquiry_id year case_title neighborhood
0 0 0 101003148628 2020 Tree Maintenance Requests Hyde Park
1 1 1 101003148656 2020 Tree Maintenance Requests Charlestown
2 2 2 101003149796 2020 Tree Maintenance Requests Allston / Brighton
3 3 3 101003149821 2020 Tree Maintenance Requests Brighton
4 4 4 101003149973 2020 New Tree Requests Charlestown
In [7]:
# check that all the columns are correct 
data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 91236 entries, 0 to 91235
Data columns (total 6 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Unnamed: 0.1     91236 non-null  int64 
 1   Unnamed: 0       91236 non-null  int64 
 2   case_enquiry_id  91236 non-null  int64 
 3   year             91236 non-null  int64 
 4   case_title       91236 non-null  object
 5   neighborhood     91236 non-null  object
dtypes: int64(4), object(2)
memory usage: 4.2+ MB
In [8]:
# check for any null values in case_title column
data['case_title'].value_counts(dropna = False)
Out[8]:
Tree Maintenance Requests    56188
New Tree Requests            19212
Tree Emergencies             15836
Name: case_title, dtype: int64
In [9]:
# check for null values in neighborhood column
data['neighborhood'].value_counts(dropna = False)
Out[9]:
Dorchester                                      12863
West Roxbury                                     9751
South End                                        6573
Roxbury                                          6334
South Boston / South Boston Waterfront           5994
Jamaica Plain                                    5973
East Boston                                      5673
Roslindale                                       5293
Allston / Brighton                               5276
Hyde Park                                        4756
Greater Mattapan                                 4393
Charlestown                                      4238
Back Bay                                         3264
Fenway / Kenmore / Audubon Circle / Longwood     2182
Boston                                           2097
Beacon Hill                                      2080
Downtown / Financial District                    1906
Mission Hill                                     1365
South Boston                                      553
Brighton                                          408
Allston                                           145
Mattapan                                          117
Chestnut Hill                                       2
Name: neighborhood, dtype: int64
In [10]:
# replace empty string values with null
data['neighborhood'] = data['neighborhood'].replace(' ', np.nan)
In [11]:
# remove any rows with null values
data = data.dropna()
In [12]:
# check that null values have been removed
data['neighborhood'].value_counts(dropna = False)
Out[12]:
Dorchester                                      12863
West Roxbury                                     9751
South End                                        6573
Roxbury                                          6334
South Boston / South Boston Waterfront           5994
Jamaica Plain                                    5973
East Boston                                      5673
Roslindale                                       5293
Allston / Brighton                               5276
Hyde Park                                        4756
Greater Mattapan                                 4393
Charlestown                                      4238
Back Bay                                         3264
Fenway / Kenmore / Audubon Circle / Longwood     2182
Boston                                           2097
Beacon Hill                                      2080
Downtown / Financial District                    1906
Mission Hill                                     1365
South Boston                                      553
Brighton                                          408
Allston                                           145
Mattapan                                          117
Chestnut Hill                                       2
Name: neighborhood, dtype: int64
In [13]:
# merge South Boston and South Boston/South Boston Waterfront rows by changing South Boston/South Boston Waterfront
# to South Boston and make Allston/Brighton into Brighton
data['neighborhood'] = data['neighborhood'].replace('South Boston / South Boston Waterfront', 'South Boston')
data['neighborhood'] = data['neighborhood'].replace('Allston / Brighton', 'Brighton')
In [14]:
# check that the changes have been made
data['neighborhood'].value_counts(dropna = False)
Out[14]:
Dorchester                                      12863
West Roxbury                                     9751
South End                                        6573
South Boston                                     6547
Roxbury                                          6334
Jamaica Plain                                    5973
Brighton                                         5684
East Boston                                      5673
Roslindale                                       5293
Hyde Park                                        4756
Greater Mattapan                                 4393
Charlestown                                      4238
Back Bay                                         3264
Fenway / Kenmore / Audubon Circle / Longwood     2182
Boston                                           2097
Beacon Hill                                      2080
Downtown / Financial District                    1906
Mission Hill                                     1365
Allston                                           145
Mattapan                                          117
Chestnut Hill                                       2
Name: neighborhood, dtype: int64
In [15]:
 
In [16]:
 
Out[16]:
In [113]:
import altair as alt

# create brush
brush = alt.selection_interval()
bar_brush = alt.selection_multi(encodings=['y']) #alt.selection_point()

# disable max rows constraint
alt.data_transformers.disable_max_rows()


options = [None] + list(data['case_title'].unique())
labels = ['All'] + list(data['case_title'].unique())

#color changing marks
rating_radio = alt.binding_select(options=options, labels=labels, name="Case Type: ")
rating_select = alt.selection_multi(fields=['case_title'], bind=rating_radio)

# create line chart with year on x-axis and number of case calls on y-axis
line_chart = alt.Chart(data, title=alt.Title(
    '311 Case Calls from 2011 to 2023',
)).mark_line(point=True).encode(
    x=alt.X('year:O', title='Year'),
    y=alt.Y('count(case_title)', title='Number of Case Calls'),
    # set colors for selection
    color=alt.condition(brush, alt.Color('case_title:N').title('Case Type'), alt.value('lightgray')),
    tooltip=['case_title',  'year','count(case_title)']
).add_params(brush).transform_filter(bar_brush).add_params(rating_select).transform_filter(rating_select)

#line_chart

# create bar chart with neighborhood on y-axis and number of case calls on x-axis
bar = alt.Chart(data, title=alt.Title('311 Case Calls by Neighborhood')).mark_bar().encode(
    x=alt.X('count(case_title)', title='Number of Case Calls').sort('color'),
    y=alt.Y('neighborhood', title='Neighborhood').sort('-x'),
    color=alt.condition(bar_brush, alt.Color('case_title:N').title('Case Type'), alt.value('lightgray')),
    tooltip=['neighborhood', 'case_title', 'count(case_title)']
).add_params(bar_brush).transform_filter(brush).add_params(rating_select).transform_filter(rating_select)
In [116]:
bar | line_chart
Out[116]:
In [117]:
 
In [ ]: